## "Statistical Analysis for Repression and Backlash Protests"
## author: "F. Schulte and C. Steinert"
## date: "6/21/2022"


## Load Packages

library(stargazer)
library(sjPlot)
library(sjlabelled)
library(sjmisc)
library(ggplot2)
library(texreg)
library(separationplot)
library(MatchIt)
library(cem)
library(stringr)
library(cobalt)
library(performance)
library(see)
library(lubridate)
library(dplyr)
library(lmtest)
library(sandwich)
library(modelsummary)
library(hrbrthemes)
library(ggpubr)
library(readxl)
library(xtable)


## Load Data
read.csv("final_dataset_submission.csv") -> backlashdata


## DESCRIPTIVE ANALYSIS

#see also file "Figures_Resubmission.RMD"


#Data summary

backlashdata %>% dplyr::select(backlash, leader_arrest, mass_arrest_12, organization_membership, Repression_10days, Repression_50days, share_income_food_log, cult_date, pre_election_period, post_election_period, context_protest_quant, ongoing_armedconflict, avg_income_pc_logged, fs_Security_Apparatus) -> backlashdata_reduced

backlashdata_reduced <- as.data.frame(backlashdata_reduced)
stargazer(backlashdata_reduced) 

### Arrests by group
arrestsbygroup <- backlashdata %>% 
  group_by(identitygroup) %>% 
  count()

### Unique identity groups
identitygroups <- backlashdata$identitygroup %>% unique() %>% sort()
stargazer(identitygroups)

### Backlashes by identitygroups
onlyback <- filter(backlashdata, backlash == 1)
backlashes_by_group <- onlyback  %>% group_by(identitygroup) %>% count()
onlynoback <- filter(backlashdata, backlash == 0)
nobacklashes_by_group <- onlynoback   %>% group_by(identitygroup) %>% count()
identitygroup <- unique(backlashdata$identitygroup)
identitygroup <- as.data.frame(identitygroup)
identitygroup$test <- 1
add1 <- merge(identitygroup, backlashes_by_group, by = "identitygroup", all.x = T, all.y = F)
add1$test <- NULL
add1 <- rename(add1, backlashes = n)
add1$backlashes <- ifelse(is.na(add1$backlashes), 0, add1$backlashes)
add2 <- merge(add1, nobacklashes_by_group, by = "identitygroup", all.x = T)
add2 <- rename(add2, no_backlashes = n)
add2$no_backlashes <- ifelse(is.na(add2$no_backlashes), 0, add2$no_backlashes)
xtable(add2, digits = 0)
uyears <- onlyback %>% group_by(identitygroup, year) %>% count()

### Dependent variable
table(backlashdata$backlash)
backlashdata$backlash_factor <- as.factor(backlashdata$backlash)
levels(backlashdata$backlash_factor) <- c("No Backlash", "Backlash")
## pdf(file = "descriptive.pdf")
p1 <- ggplot(data = backlashdata, mapping = aes(x = backlash_factor)) +
  geom_bar(width = 0.6) +
  ylab("# of arrests") +
  xlab("") +
  theme_minimal() 
p_dt <- layer_data(p1)
p1 + annotate(geom = "text", label = p_dt$count, x = p_dt$x, y = p_dt$y + 20)
## dev.off()

### Stacked bar chart
backlashdata_notna <- backlashdata[!is.na(backlashdata$cowc), ]
## pdf(file = "stackedbarchart.pdf", width=18, height=8)
ggplot(data = backlashdata_notna) + ylab("Share of arrests with backlash by country") +
  xlab("") + scale_fill_manual(values = c("skyblue1","red")) + 
  geom_bar(mapping = aes(x = cowc, fill = backlash_factor), position = "fill") +
  theme_minimal() + theme(legend.title = element_blank())
## dev.off()

# Cases that are neither mass arrest nor leader arrest
backlashdata$different_arrest <- ifelse(backlashdata$mass_arrest_12 == 0 & 
                                          backlashdata$leader_arrest == 0, 1, 0)
backlashdata$both_arrests <- ifelse(backlashdata$mass_arrest_12 == 1 & 
                                      backlashdata$leader_arrest == 1, 1, 0)

table(backlashdata$different_arrest)
table(backlashdata$mass_arrest_12)
table(backlashdata$leader_arrest)
table(backlashdata$both_arrests)
crosstab <- table(backlashdata$mass_arrest_12, backlashdata$leader_arrest)
crosstab
addmargins(crosstab)

# Contigency table arrest types by backlash
t1 <- table(backlash = backlashdata$backlash, leader_arrest = backlashdata$leader_arrest)
addmargins(t1)

t1 <- xtabs(~ mass_arrest_25+leader_arrest + backlash, data=backlashdata)
ftable(t1)
stargazer(ftable(t1)) 




## DATA PREP



#### Prepare variables for analysis
class(backlashdata$share_income_food_log)
backlashdata$share_income_food_log <- as.numeric(backlashdata$share_income_food_log)
class(backlashdata$cult_date)
backlashdata$cult_date <- as.numeric(backlashdata$cult_date)
class(backlashdata$sharerecent_repression_1)
backlashdata$sharerecent_repression_1 <- as.numeric(backlashdata$sharerecent_repression_1)
class(backlashdata$sharerecent_repression_2)
backlashdata$sharerecent_repression_2 <- as.numeric(backlashdata$sharerecent_repression_2)
class(backlashdata$sharerecent_repression_3)
backlashdata$sharerecent_repression_3 <- as.numeric(backlashdata$sharerecent_repression_3)
class(backlashdata$fs_index_total)
backlashdata$fs_index_total <- as.numeric(backlashdata$fs_index_total)
class(backlashdata$fs_Security_Apparatus)
backlashdata$fs_Security_Apparatus <- as.numeric(backlashdata$fs_Security_Apparatus)
class(backlashdata$fs_Economic_Inequality)
backlashdata$fs_Economic_Inequality<- as.numeric(backlashdata$fs_Economic_Inequality)
class(backlashdata$fs_PublicServices)
backlashdata$fs_PublicServices <- as.numeric(backlashdata$fs_PublicServices)
class(backlashdata$fs_DemographicPressures)
backlashdata$fs_DemographicPressures <- as.numeric(backlashdata$fs_DemographicPressures)
class(backlashdata$v2x_polyarchy)
backlashdata$v2x_polyarchy <- as.numeric(backlashdata$v2x_polyarchy)
class(backlashdata$fs_StateLegitimacy)
backlashdata$fs_StateLegitimacy <- as.numeric(backlashdata$fs_StateLegitimacy)
class(backlashdata$v2peapssoc)
backlashdata$v2peapssoc <- as.numeric(backlashdata$v2peapssoc)

# Create binary organization membership-variable 
backlashdata$organization_membership_binary <- ifelse(backlashdata$organization_membership > 0, 1, 0)



#rename variables


rename(backlashdata, context_protest = context_protest_quant) -> backlashdata

rename(backlashdata, democracy = v2x_polyarchy) -> backlashdata

rename(backlashdata, exclusion = v2peapssoc) -> backlashdata

rename(backlashdata, stateness = fs_StateLegitimacy) -> backlashdata

rename(backlashdata, cult_event = cult_date) -> backlashdata

rename(backlashdata, security_apparatus = fs_Security_Apparatus) -> backlashdata

rename(backlashdata, repression_10days = Repression_10days) -> backlashdata

rename(backlashdata, repression_50days = Repression_50days) -> backlashdata






# logistic regression base models with step-wise inclusion (Table 4 Appendix)


bs <- glm(backlash ~ leader_arrest, family = binomial(link = logit), data = backlashdata)
summary(bs)
bs_c <- coeftest(bs, vcov = vcovCL, cluster = ~identitygroup)
bs_c

bs2 <- glm(backlash ~ mass_arrest_12, family = binomial(link = logit), data = backlashdata)
summary(bs2)
bs2_c <- coeftest(bs2, vcov = vcovCL, cluster = ~identitygroup)
bs2_c

bs3 <- glm(backlash ~ mass_arrest_25, family = binomial(link = logit), data = backlashdata)
summary(bs3)
bs3_c <- coeftest(bs3, vcov = vcovCL, cluster = ~identitygroup)
bs3_c

bs4 <- glm(backlash ~ mass_arrest_50, family = binomial(link = logit), data = backlashdata)
summary(bs4)
bs4_c <- coeftest(bs4, vcov = vcovCL, cluster = ~identitygroup)
bs4_c

bs5 <- glm(backlash ~ mass_arrest_100, family = binomial(link = logit), data = backlashdata)
summary(bs5)
bs5_c <- coeftest(bs5, vcov = vcovCL, cluster = ~identitygroup)
bs5_c

bt1 <- glm(backlash ~ leader_arrest + mass_arrest_25, family = binomial(link = logit), data = backlashdata)
summary(bt1)
bt1_c <- coeftest(bt1, vcov = vcovCL, cluster = ~identitygroup)
bt1_c

bt2 <- glm(backlash ~ leader_arrest + mass_arrest_50, family = binomial(link = logit), data = backlashdata)
summary(bt2)
bt2_c <- coeftest(bt2, vcov = vcovCL, cluster = ~identitygroup)
bt2_c

bt3 <- glm(backlash ~ leader_arrest + mass_arrest_100, family = binomial(link = logit), data = backlashdata)
summary(bt3)
bt3_c <- coeftest(bt3, vcov = vcovCL, cluster = ~identitygroup)
bt3_c


#put it all together

stargazer(list(bs, bs2, bs3, bs4, bs5, bt1, bt2, bt3), title = "Logistic regression results", se = list(bs_c[,2], bs2_c[,2], bs3_c[,2], bs4_c[,2], bs5_c[,2], bt1_c[,2], bt2_c[,2]),
          bt3_c[,2])




# logistic regression models (Table 1 manuscript)

#models with leader arrests and mass arrests_12 (without organization membership)

#pooled model

pooled1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period  + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict, family = binomial(link = logit), data = backlashdata)

pooled1_cl <- coeftest(pooled1, vcov = vcovCL, cluster = ~identitygroup)



#country fe

countryfe1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict + factor(country), family = binomial(link = logit), data = backlashdata)

countryfe1_cl <- coeftest(countryfe1, vcov = vcovCL, cluster = ~identitygroup)


#group fe
groupfe1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict + factor(identitygroup), family = binomial(link = logit), data = backlashdata)

groupfe1_cl <- coeftest(groupfe1, vcov = vcovCL, cluster = ~identitygroup)


#country-year fe
countryearfe1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict + factor(country) + factor(year), family = binomial(link = logit), data = backlashdata)

countryearfe1_cl <- coeftest(countryearfe1, vcov = vcovCL, cluster = ~identitygroup)


#group-year fe
groupyearfe1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict + factor(identitygroup) + factor(year), family = binomial(link = logit), data = backlashdata)

groupyearfe1_cl <- coeftest(groupyearfe1, vcov = vcovCL, cluster = ~identitygroup)


#put it all together

stargazer(list(pooled1, countryfe1, groupfe1, countryearfe1, groupyearfe1), title = "Logistic regression results", se = list(pooled1_cl[,2], countryfe1_cl[,2], groupfe1_cl[,2], countryearfe1_cl[,2], groupyearfe1_cl[,2]), omit= c("factor(year)", "factor(country)", "factor(identitygroup)"), add.lines=list(c("Year fixed effects", rep("YES", 5))))

#or modelsummary

models <- list(pooled1, countryfe1, groupfe1, countryearfe1, groupyearfe1)
modelsummary(models, stars = TRUE, coef_omit = ".*factor|Intercept")


#logistic regression models with repression intensity variables

#models with leader arrests and mass arrests_12 (without organization membership)



# Prepare disappearances per capita
backlashdata$disappearances_perht <- ((backlashdata$numer_forcdisappear) / (backlashdata$PopTotal)) * 100000
backlashdata$rep_fatalities_perht <- (backlashdata$fatalities_repr_share) * 100000

#pooled model

pooled1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + disappearances_perht + rep_fatalities_perht + share_income_food_log + cult_event + pre_election_period + post_election_period  + context_protest + democracy + ongoing_armedconflict, family = binomial(link = logit), data = backlashdata)

pooled1_cl <- coeftest(pooled1, vcov = vcovCL, cluster = ~identitygroup)



#country fe

countryfe1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days +  disappearances_perht + rep_fatalities_perht + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + ongoing_armedconflict + factor(country), family = binomial(link = logit), data = backlashdata)

countryfe1_cl <- coeftest(countryfe1, vcov = vcovCL, cluster = ~identitygroup)


#group fe
groupfe1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + disappearances_perht + rep_fatalities_perht + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + ongoing_armedconflict + factor(identitygroup), family = binomial(link = logit), data = backlashdata)

groupfe1_cl <- coeftest(groupfe1, vcov = vcovCL, cluster = ~identitygroup)


#country-year fe
countryearfe1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days +  disappearances_perht + rep_fatalities_perht + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + ongoing_armedconflict + factor(country) + factor(year), family = binomial(link = logit), data = backlashdata)

countryearfe1_cl <- coeftest(countryearfe1, vcov = vcovCL, cluster = ~identitygroup)


#group-year fe
groupyearfe1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days +  disappearances_perht + rep_fatalities_perht + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + ongoing_armedconflict + factor(identitygroup) + factor(year), family = binomial(link = logit), data = backlashdata)

groupyearfe1_cl <- coeftest(groupyearfe1, vcov = vcovCL, cluster = ~identitygroup)


#put it all together

stargazer(list(pooled1, countryfe1, groupfe1, countryearfe1, groupyearfe1), title = "Logistic regression results", se = list(pooled1_cl[,2], countryfe1_cl[,2], groupfe1_cl[,2], countryearfe1_cl[,2], groupyearfe1_cl[,2]), omit= c("factor(year)", "factor(country)", "factor(identitygroup)"), add.lines=list(c("Year fixed effects", rep("YES", 5))))

#or modelsummary

models <- list(pooled1, countryfe1, groupfe1, countryearfe1, groupyearfe1)
modelsummary(models, stars = TRUE, coef_omit = ".*factor|Intercept")




## OLS REGRESSION MODELS (table 5 Appendix)


#pooled model

pooled1 <- lm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict, data = backlashdata)

pooled1_cl <- coeftest(pooled1, vcov = vcovCL, cluster = ~identitygroup)


#country fe

countryfe1 <- lm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict + factor(country), data = backlashdata)

countryfe1_cl <- coeftest(countryfe1, vcov = vcovCL, cluster = ~identitygroup)


#group fe
groupfe1 <- lm(backlash ~ leader_arrest + mass_arrest_12  + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict + factor(identitygroup), data = backlashdata)

groupfe1_cl <- coeftest(groupfe1, vcov = vcovCL, cluster = ~identitygroup)


#country-year fe
countryearfe1 <- lm(backlash ~ leader_arrest + mass_arrest_12  + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict + factor(country) + factor(year), family = binomial(link = logit), data = backlashdata)

countryearfe1_cl <- coeftest(countryearfe1, vcov = vcovCL, cluster = ~identitygroup)


#group-year fe
groupyearfe1 <- lm(backlash ~ leader_arrest + mass_arrest_12  + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict + factor(identitygroup) + factor(year), data = backlashdata)

groupyearfe1_cl <- coeftest(groupyearfe1, vcov = vcovCL, cluster = ~identitygroup)


#put it all together

stargazer(list(pooled1, countryfe1, groupfe1, countryearfe1, groupyearfe1), title = "OLS regression results", se = list(pooled1_cl[,2], countryfe1_cl[,2], groupfe1_cl[,2], countryearfe1_cl[,2], groupyearfe1_cl[,2]), omit= c("factor(year)", "factor(country)", "factor(identitygroup)"), add.lines=list(c("Year fixed effects", rep("YES", 5))))

#or modelsummary

models <- list(pooled1, countryfe1, groupfe1, countryearfe1, groupyearfe1)
modelsummary(models, stars = TRUE, coef_omit = ".*factor|Intercept")





## VISUALIZE RESULTS: Coefficient plot (Figure 2)


# Coefficient plots
m1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict, family = binomial(link = logit), data = backlashdata)
m2 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict + factor(year), family = binomial(link = logit), data = backlashdata)

# Standardize variables for coefficient plot
logregvars <- select(backlashdata, leader_arrest , mass_arrest_12 , repression_10days , repression_50days , share_income_food_log , cult_event , pre_election_period , context_protest , democracy , security_apparatus , avg_income_pc_logged , ongoing_armedconflict)
# Z-standardization
standardized_logreg <- scale(logregvars)
apply(standardized_logreg, 2, sd, na.rm = T)
# Add dependent variable and year variable
standardized_data <- cbind(standardized_logreg, backlashdata$backlash)
standardized_data <- cbind(standardized_data, backlashdata$year)
apply(standardized_data, 2, sd, na.rm = T)
# As data.frame
standardized_data <- as.data.frame(standardized_data)
# Rename variables
standardized_data <- rename(standardized_data, backlash = V13)
standardized_data <- rename(standardized_data, year = V14)

# Re-run models with standardized data
m1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict, family = binomial(link = logit), data = standardized_data)
m2 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict + factor(year), family = binomial(link = logit), data = standardized_data)

library(broom)
library(ggplot2)
results_m1 <- m1 %>% tidy()
results_m1 <- results_m1 %>% mutate(lower = estimate - 1.65*std.error, 
                                    upper = estimate + 1.65*std.error, 
                                    lower_95 = estimate -	1.96*std.error, 
                                    upper_95 = estimate +	1.96*std.error,
                                    Scenario = "No fixed effects")

results_m2 <- m2 %>% tidy()
results_m2 <- results_m2 %>% mutate(lower = estimate - 1.65*std.error, 
                                    upper = estimate + 1.65*std.error, 
                                    lower_95 = estimate -	1.96*std.error, 
                                    upper_95 = estimate +	1.96*std.error,
                                    Scenario = "Year fixed effects")


results <- rbind(results_m1, results_m2)
results <- results %>% 
  filter(!term %in% c("factor(year)2017", "factor(year)2018",
                      "factor(year)2019", "factor(year)2020", "(Intercept)"))


level_order <- factor(results$term, level = c("context_protest", 
                                              "pre_election_period", "cult_event", "repression_10days", "repression_50days",
                                              "democracy", "security_apparatus", "share_income_food_log", "avg_income_pc_logged", "ongoing_armedconflict", "mass_arrest_12", "leader_arrest"))  

#pdf(file = "coef_plot_main.pdf")
ggplot(results) +
  geom_linerange(aes(ymin = lower_95, ymax = upper_95,
                     y = estimate, x = level_order, shape = Scenario), 
                 position = position_dodge(width = 0.5), 
                 color = "gray40", size = 0.75) +
  geom_linerange(aes(ymin = lower, ymax = upper,
                     y = estimate, x = level_order, shape = Scenario), 
                 position = position_dodge(width = 0.5), 
                 color = "gray80", size = 1.25) +
  geom_point(aes(y = estimate, x = level_order, shape = Scenario), position = position_dodge(width = 0.5), size = 2) +
  geom_hline(yintercept = 0, lty = 2) +
  xlab("") +
  coord_flip() +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5)) +
  ggtitle("Logged odds of backlash protests \n (standardized coefficients)")
#dev.off()



## Separation plot (Figure 2 Appendix)

# Separation plot
m1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict + factor(identitygroup) + factor(year), family = binomial(link = logit), data = backlashdata)
included_vars <- m1$model
#pdf(file = "separationplot.pdf")
separationplot(m1$fitted.values, included_vars$backlash,
               line = TRUE,
               heading = "", show.expected = T,
               height = 2,
               newplot = F)
#dev.off()




#ROBUSTNESS TESTS

#### CEM Matching (Table 7 & Figure 1 Appendix)

# Robustness tests

######### Matching on grievance indicators ######
#### Create sub-dataset containing only complete observations
subvars <- dplyr::select(backlashdata, c("backlash","leader_arrest", "mass_arrest_12", "organization_membership", "repression_10days",  "repression_50days", "share_income_food_log", "cult_event", "pre_election_period", "post_election_period", "context_protest" , "ongoing_armedconflict", "avg_income_pc_logged", "security_apparatus", "democracy", "exclusion", "country", "identitygroup", "year"))

subvars <- subvars[!is.na(subvars$backlash) & !is.na(subvars$leader_arrest) &
                     !is.na(subvars$mass_arrest_12) & !is.na(subvars$repression_10days) &
                     !is.na(subvars$repression_50days) & !is.na(subvars$share_income_food_log) &
                     !is.na(subvars$cult_event) & !is.na(subvars$pre_election_period) & 
                     !is.na(subvars$post_election_period) & !is.na(subvars$ongoing_armedconflict) & !is.na(subvars$context_protest) & !is.na(subvars$organization_membership) & !is.na(subvars$avg_income_pc_logged) & !is.na(subvars$democracy) & !is.na(subvars$security_apparatus) & !is.na(subvars$exclusion)  & !is.na(subvars$year) & !is.na(subvars$identitygroup) & !is.na(subvars$country), ]


#### Set seed
set.seed(040715)

#### Preprocess data with coarsened exact matching on grievance variables
matched_data <- matchit(leader_arrest ~ repression_10days + avg_income_pc_logged + post_election_period,
                        data = subvars,
                        method = "cem")
matched_data
plot(matched_data, interactive = FALSE)

### Figure 1 Appendix
#pdf(file = "covariate_balance_new.pdf")
love.plot(matched_data)
#dev.off()

#### Create new data frame with matched data
matched <- match.data(matched_data)

#### Re-run main analysis
matched1 <- lm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + ongoing_armedconflict + avg_income_pc_logged + security_apparatus + democracy + exclusion + factor(identitygroup) + factor(year), data = matched, weights = weights)
screenreg(matched1)
matched1_cl <- coeftest(matched1, vcov = vcovCL, cluster = ~identitygroup)

#### Re-run main analysis with matched data

#model 1
po <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + 
            pre_election_period + post_election_period + context_protest + ongoing_armedconflict + avg_income_pc_logged + security_apparatus  +
            democracy + exclusion, family = binomial(link = logit), data = matched, weights = weights)
pd <- coeftest(po, vcov = vcovCL, cluster = ~identitygroup)


#model2
co <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event +  pre_election_period + post_election_period + context_protest + ongoing_armedconflict + avg_income_pc_logged + security_apparatus  +
            democracy + exclusion + factor(country), family = binomial(link = logit), data = matched, weights = weights)
ld <- coeftest(co, vcov = vcovCL, cluster = ~identitygroup)


#model3
gr <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + 
            pre_election_period + post_election_period + context_protest + ongoing_armedconflict + avg_income_pc_logged + security_apparatus  +
            democracy + exclusion + factor(identitygroup), family = binomial(link = logit), data = matched, weights = weights)
fd <- coeftest(gr, vcov = vcovCL, cluster = ~identitygroup)


#model4
cy <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + ongoing_armedconflict + avg_income_pc_logged + security_apparatus  + democracy + exclusion + factor(country) + factor(year), family = binomial(link = logit), data = matched, weights = weights)

cd <- coeftest(cy, vcov = vcovCL, cluster = ~identitygroup)

## present results
stargazer(list(po, co, gr, cy), title = "CEM results", se = list(pd[,2], ld[,2], fd[,2], cd[,2]),
          omit="factor\\(year", add.lines=list(c("Year fixed effects", rep("YES", 4))))


# Coefficient plots
m1 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + 
            pre_election_period + post_election_period + context_protest + ongoing_armedconflict + avg_income_pc_logged + security_apparatus  +
            democracy + exclusion, family = binomial(link = logit), data = matched, weights = weights)

m2 <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + 
            pre_election_period + post_election_period + context_protest + ongoing_armedconflict + avg_income_pc_logged + security_apparatus  +
            democracy + exclusion + factor(year), family = binomial(link = logit), data = matched, weights = weights)


results_m1 <- m1 %>% tidy()
results_m1 <- results_m1 %>% mutate(lower = estimate - 1.65*std.error, 
                                    upper = estimate + 1.65*std.error, 
                                    lower_95 = estimate -	1.96*std.error, 
                                    upper_95 = estimate +	1.96*std.error,
                                    Scenario = "No fixed effects")

results_m2 <- m2 %>% tidy()
results_m2 <- results_m2 %>% mutate(lower = estimate - 1.65*std.error, 
                                    upper = estimate + 1.65*std.error, 
                                    lower_95 = estimate -	1.96*std.error, 
                                    upper_95 = estimate +	1.96*std.error,
                                    Scenario = "Year fixed effects")


results <- rbind(results_m1, results_m2)
results <- results %>% 
  filter(!term %in% c("factor(year)2017", "factor(year)2018",
                      "factor(year)2019", "factor(year)2020", "(Intercept)"))


level_order <- factor(results$term, level = c("context_protest", "post_election_period", "exclusion",
                                              "pre_election_period", "cult_event", "repression_10days", "repression_50days",
                                              "democracy", "security_apparatus", "share_income_food_log", "avg_income_pc_logged", "ongoing_armedconflict", "mass_arrest_12", "leader_arrest"))  

ggplot(results) +
  geom_linerange(aes(ymin = lower_95, ymax = upper_95,
                     y = estimate, x = level_order, shape = Scenario), 
                 position = position_dodge(width = 0.5), 
                 color = "gray40", size = 0.75) +
  geom_linerange(aes(ymin = lower, ymax = upper,
                     y = estimate, x = level_order, shape = Scenario), 
                 position = position_dodge(width = 0.5), 
                 color = "gray80", size = 1.25) +
  geom_point(aes(y = estimate, x = level_order, shape = Scenario), position = position_dodge(width = 0.5), size = 2) +
  geom_hline(yintercept = 0, lty = 2) +
  xlab("") +
  coord_flip() +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5)) +
  ggtitle("Logged odds of backlash protests")







#### Matching on different covariates

# Robustness tests

######### Matching on grievance indicators ######
#### Create sub-dataset containing only complete observations
subvars <- dplyr::select(backlashdata, c("backlash","leader_arrest", "mass_arrest_12", "organization_membership", "repression_10days",  "repression_50days", "share_income_food_log", "cult_event", "pre_election_period", "post_election_period", "context_protest" , "ongoing_armedconflict", "avg_income_pc_logged", "security_apparatus", "democracy", "exclusion", "country", "identitygroup", "year"))

subvars <- subvars[!is.na(subvars$backlash) & !is.na(subvars$leader_arrest) &
                     !is.na(subvars$mass_arrest_12) & !is.na(subvars$repression_10days) &
                     !is.na(subvars$repression_50days) & !is.na(subvars$share_income_food_log) &
                     !is.na(subvars$cult_event) & !is.na(subvars$pre_election_period) & 
                     !is.na(subvars$post_election_period) & !is.na(subvars$ongoing_armedconflict) & !is.na(subvars$context_protest) & !is.na(subvars$organization_membership) & !is.na(subvars$avg_income_pc_logged) & !is.na(subvars$democracy) & !is.na(subvars$security_apparatus) & !is.na(subvars$exclusion)  & !is.na(subvars$year) & !is.na(subvars$identitygroup) & !is.na(subvars$country), ]


#### Set seed
set.seed(040715)

#### Preprocess data with coarsened exact matching on grievance variables
matched_data <- matchit(leader_arrest ~ cult_event + ongoing_armedconflict + democracy,
                        data = subvars,
                        method = "cem")
matched_data
plot(matched_data, interactive = FALSE)
pdf(file = "covariate_balance_new.pdf")
love.plot(matched_data)
dev.off()

#### Create new data frame with matched data
matched <- match.data(matched_data)

#### Re-run main analysis
matched1 <- lm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + ongoing_armedconflict + avg_income_pc_logged + security_apparatus + democracy + exclusion + factor(identitygroup) + factor(year), data = matched, weights = weights)
screenreg(matched1)
matched1_cl <- coeftest(matched1, vcov = vcovCL, cluster = ~identitygroup)

#### Re-run main analysis with matched data

#model 1
po <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + 
            pre_election_period + post_election_period + context_protest + ongoing_armedconflict + avg_income_pc_logged + security_apparatus  +
            democracy + exclusion, family = binomial(link = logit), data = matched, weights = weights)
pd <- coeftest(po, vcov = vcovCL, cluster = ~identitygroup)


#model2
co <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event +  pre_election_period + post_election_period + context_protest + ongoing_armedconflict + avg_income_pc_logged + security_apparatus  +
            democracy + exclusion + factor(country), family = binomial(link = logit), data = matched, weights = weights)
ld <- coeftest(co, vcov = vcovCL, cluster = ~identitygroup)


#model3
gr <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + 
            pre_election_period + post_election_period + context_protest + ongoing_armedconflict + avg_income_pc_logged + security_apparatus  +
            democracy + exclusion + factor(identitygroup), family = binomial(link = logit), data = matched, weights = weights)
fd <- coeftest(gr, vcov = vcovCL, cluster = ~identitygroup)


#model4
cy <- glm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + context_protest + ongoing_armedconflict + avg_income_pc_logged + security_apparatus  + democracy + exclusion + factor(country) + factor(year), family = binomial(link = logit), data = matched, weights = weights)

cd <- coeftest(cy, vcov = vcovCL, cluster = ~identitygroup)



## present results
stargazer(list(po, co, gr, cy), title = "CEM results", se = list(pd[,2], ld[,2], fd[,2], cd[,2]),
          omit="factor\\(year", add.lines=list(c("Year fixed effects", rep("YES", 4))))






#### Test alternative model specifications (Table 6 Appendix)


#model1

pooled1 <- lm(backlash ~ leader_arrest + mass_arrest_25 + context_protest + fs_Economic_Inequality + ongoing_armedconflict +
                cult_event + pre_election_period + post_election_period + factor(year), data = backlashdata)
pooled1_cl <- coeftest(pooled1, vcov = vcovCL, cluster = ~identitygroup)


#model2

countryfe1 <- lm(backlash ~ leader_arrest + mass_arrest_25 + context_protest + fs_Economic_Inequality + ongoing_armedconflict +
                   cult_event + pre_election_period + post_election_period + log(Days_since_last_repression) + sharerecent_repression_1 + factor(year), data = backlashdata)
countryfe1_cl <- coeftest(countryfe1, vcov = vcovCL, cluster = ~identitygroup)


#model3
groupfe1 <- lm(backlash ~ leader_arrest + mass_arrest_25 + context_protest + fs_Economic_Inequality +  ongoing_armedconflict + cult_event + pre_election_period + post_election_period + log(Days_since_last_repression) + sharerecent_repression_1 +   fs_Factionalized_Elites  + factor(year), data = backlashdata)
groupfe1_cl <- coeftest(groupfe1, vcov = vcovCL, cluster = ~identitygroup)

#model4
countryearfe1 <- lm(backlash ~ leader_arrest + mass_arrest_25 + context_protest + fs_Economic_Inequality + ongoing_armedconflict +
                      cult_event + pre_election_period + post_election_period + log(Days_since_last_repression) + sharerecent_repression_1 +  fs_Factionalized_Elites + fs_PublicServices + factor(year), data = backlashdata)
countryearfe1_cl <- coeftest(countryearfe1, vcov = vcovCL, cluster = ~identitygroup)


#model5
groupyearfe1 <- lm(backlash ~ leader_arrest + mass_arrest_25 + context_protest+  fs_Economic_Inequality  + ongoing_armedconflict + cult_event + pre_election_period + post_election_period + log(Days_since_last_repression) + sharerecent_repression_1 +  fs_Factionalized_Elites + fs_PublicServices + v2juaccnt + factor(year), data = backlashdata)


groupyearfe1_cl <- coeftest(groupyearfe1, vcov = vcovCL, cluster = ~identitygroup)
stargazer(list(pooled1, countryfe1, groupfe1, countryearfe1, groupyearfe1), title = "Regression results", 
          se = list(pooled1_cl[,2], countryfe1_cl[,2], groupfe1_cl[,2], countryearfe1_cl[,2], groupyearfe1_cl[,2]),
          omit="factor\\(year", add.lines=list(c("Year fixed effects", rep("YES", 5))))







### Exclude identity groups one by one 


results <- rep(NA, length(unique(backlashdata$identitygroup)))
resultsList <- list()
for(i in backlashdata$identitygroup){
  pooled <- lm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + context_protest + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + ongoing_armedconflict + avg_income_pc_logged + security_apparatus + democracy + exclusion + factor(identitygroup) + factor(year), data = backlashdata[backlashdata$identitygroup != i, ])
  resultsList[[i]] <- screenreg(pooled)
}
resultsList




### Create fixed effects models for months (Table 8 Appendix)


backlashdata$date <- as.Date(backlashdata$trigger_date, "%d.%m.%y")
backlashdata$month <- month(backlashdata$date)

onlymonth <- lm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days  + context_protest + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + ongoing_armedconflict + avg_income_pc_logged + security_apparatus + democracy + exclusion + factor(month), data = backlashdata)
om_cl <- coeftest(onlymonth, vcov = vcovCL, cluster = ~identitygroup)


countryearfe1 <- lm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days  + context_protest + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + ongoing_armedconflict + avg_income_pc_logged + security_apparatus + democracy + exclusion + factor(country) + factor(month), data = backlashdata)
countryearfe1_cl <- coeftest(countryearfe1, vcov = vcovCL, cluster = ~identitygroup)


groupyearfe1 <- lm(backlash ~ leader_arrest + mass_arrest_12 + repression_10days + context_protest + repression_50days + share_income_food_log + cult_event + pre_election_period + post_election_period + ongoing_armedconflict + avg_income_pc_logged + security_apparatus + democracy + exclusion + factor(identitygroup) + factor(month), data = backlashdata)
groupyearfe1_cl <- coeftest(groupyearfe1, vcov = vcovCL, cluster = ~identitygroup)

#present results
stargazer(list(onlymonth, countryearfe1, groupyearfe1), title = "Regression results", 
          se = list(om_cl[,2], countryearfe1_cl[,2], groupyearfe1_cl[,2]),
          omit="factor\\(year", add.lines=list(c("Year fixed effects", rep("YES", 5))))





### Test the mechanisms with the organization membership-variable and the leader type-variable (Table 2 Manuscript)


b1 <- glm(backlash ~ leader_arrest + organization_membership_binary, family = binomial(link = logit), data = backlashdata)
summary(b1)
b1_cl <- coeftest(b1, vcov = vcovCL, cluster = ~identitygroup)
b1_cl
b2 <- glm(backlash ~ leader_arrest * organization_membership_binary, family = binomial(link = logit), data = backlashdata)
summary(b2)
b2_cl <- coeftest(b2, vcov = vcovCL, cluster = ~identitygroup)
b2_cl
pooled1 <- glm(backlash ~ leader_arrest + organization_membership_binary + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict, family = binomial(link = logit), data = backlashdata)
summary(pooled1)
pl1_cl <- coeftest(pooled1, vcov = vcovCL, cluster = ~identitygroup)
pl1_cl

c1 <- glm(backlash ~ leader_arrest + leader_type_symbolic, family = binomial(link = logit), data = backlashdata)
summary(c1)
c1_cl <- coeftest(c1, vcov = vcovCL, cluster = ~identitygroup)
c1_cl
c2 <- glm(backlash ~ leader_arrest * leader_type_symbolic, family = binomial(link = logit), data = backlashdata)
summary(c2)
c2_cl <- coeftest(c2, vcov = vcovCL, cluster = ~identitygroup)
c2_cl
pooled2 <- glm(backlash ~ leader_arrest + leader_type_symbolic + repression_10days + repression_50days + share_income_food_log + cult_event + pre_election_period + context_protest + democracy + security_apparatus + avg_income_pc_logged + ongoing_armedconflict, family = binomial(link = logit), data = backlashdata)
summary(pooled2)
pl2_cl <- coeftest(pooled2, vcov = vcovCL, cluster = ~identitygroup)
pl2_cl

stargazer(list(b1, b2, c1, c2, pooled1, pooled2), title = "Logistic regression results", se = list(b1_cl[,2], b2_cl[,2], c1_cl[,2], c2_cl[,2], pl1_cl[,2], pl2_cl[,2]), omit= c("factor(year)", "factor(country)", "factor(identitygroup)"), add.lines=list(c("Year fixed effects", rep("YES", 5))))



##Figure 1

#Load Dataset

```{r}

read.csv("final_dataset_submission.csv") -> dataset

#final_dataset -> dataset
```


#Lolli Plot by Regime Type 


#Get regime types and world regions from v-dem for year 2020
library(vdem)

regime_types <- extract_vdem(name_pattern = "v2x_regime", 
                             include_external = TRUE)

regime_types_reduced <- regime_types %>% select(vdem_country_name, vdem_cown ,year, v2x_regime, vdem_country_text_id, vdem_country_id) %>% filter (year == 2020)

#rename variables
rename(regime_types_reduced, regime_type = v2x_regime) -> regime_types_reduced

as.character(regime_types_reduced$regime_type) -> regime_types_reduced$regime_type

regime_types_reduced["regime_type"][regime_types_reduced["regime_type"] == 0] <- "Closed Autocracy"
regime_types_reduced["regime_type"][regime_types_reduced["regime_type"] == 1] <- "Electoral Autocracy"
regime_types_reduced["regime_type"][regime_types_reduced["regime_type"] == 2] <- "Electoral Democracy"
regime_types_reduced["regime_type"][regime_types_reduced["regime_type"] == 3] <- "Liberal Democracy"

rename(regime_types_reduced, vdem = vdem_country_id) -> regime_types_reduced

#backlash as factor
dataset$backlash_factor <- as.factor(dataset$backlash)
levels(dataset$backlash_factor) <- c("No Backlash", "Backlash")

#prepare backlash cases
dataset %>% select(cowc, vdem ,backlash) %>% mutate(backlash = case_when(backlash == "1" ~ 'Backlash',TRUE ~ 'No Backlash')) -> plot_data


#match regime type data with our backlash-dataset
left_join(plot_data, regime_types_reduced, by = c("vdem")) -> plot_data

#rename
rename(plot_data, country = vdem_country_text_id) -> plot_data


#count backlash cases and no backlash cases and calculate percentages
plot_data %>% group_by(country, backlash) %>% count() %>% spread(backlash, n, fill = 0L) %>% mutate(Cases_total = `Backlash` + `No Backlash`) %>% mutate(Percentage_Backlashs = ((`Backlash` / `Cases_total`)*100)) %>% mutate(Percentage_NoBacklashs = ((`No Backlash` / `Cases_total`)*100)) -> plot_data2


plot_data %>% select(country, regime_type) -> plot_data
left_join(plot_data2, plot_data, by = ("country")) -> plot_data2
plot_data2 %>% distinct() -> plot_data2


#Lolli Plot
ggplot(plot_data2, aes(x= reorder(country, Percentage_Backlashs), y=Percentage_Backlashs)) +
  geom_segment( aes(x=country, xend=country, y=0, yend=Percentage_Backlashs), color="grey") +
  geom_point( color="orange", size=2) +
  theme(panel.grid.major.x = element_blank(),panel.border = element_blank(), axis.ticks.x = element_blank()) 


#Plot with Regime Types

closed_autocracy_dt <- plot_data2 %>% filter (regime_type == "Closed Autocracy")
elect_autocracy_dt <- plot_data2 %>% filter (regime_type == "Electoral Autocracy")
elect_democracy_dt <- plot_data2 %>% filter (regime_type == "Electoral Democracy")
liberal_democracy_dt <- plot_data2 %>% filter (regime_type == "Liberal Democracy")


closed_autocracy <- ggplot(closed_autocracy_dt) +
  geom_segment( aes(x=country, xend=country, y=Percentage_Backlashs, yend=Percentage_NoBacklashs), color="grey") +
  geom_point( aes(x=country, y=Percentage_Backlashs), color=rgb(0.7,0.2,0.1,0.5), size=2.1 ) +
  geom_point( aes(x=country, y=Percentage_NoBacklashs), color="gray", size=2.1 ) +
  coord_flip() + 
  theme_ipsum() + scale_y_continuous(breaks=c(0,50,100))

elect_autocracy <- ggplot(elect_autocracy_dt) +
  geom_segment( aes(x=country, xend=country, y=Percentage_Backlashs, yend=Percentage_NoBacklashs), color="grey") +
  geom_point( aes(x=country, y=Percentage_Backlashs), color=rgb(0.7,0.2,0.1,0.5), size=2.1 ) +
  geom_point( aes(x=country, y=Percentage_NoBacklashs), color="gray", size=2.1 ) +
  coord_flip() + 
  theme_ipsum() + scale_y_continuous(breaks=c(0,50,100))

elect_democracy <- ggplot(elect_democracy_dt) +
  geom_segment( aes(x=country, xend=country, y=Percentage_Backlashs, yend=Percentage_NoBacklashs), color="grey") +
  geom_point( aes(x=country, y=Percentage_Backlashs), color=rgb(0.7,0.2,0.1,0.5), size=2.1 ) +
  geom_point( aes(x=country, y=Percentage_NoBacklashs), color="gray", size=2.1 ) +
  coord_flip() + 
  theme_ipsum() + scale_y_continuous(breaks=c(0,50,100))


liberal_democracy <- ggplot(liberal_democracy_dt) +
  geom_segment( aes(x=country, xend=country, y=Percentage_Backlashs, yend=Percentage_NoBacklashs), color="grey") +
  geom_point( aes(x=country, y=Percentage_Backlashs), color=rgb(0.7,0.2,0.1,0.5), size=2.1 ) +
  geom_point( aes(x=country, y=Percentage_NoBacklashs), color="gray", size=2.1 ) +
  coord_flip() + 
  theme_ipsum() + scale_y_continuous(breaks=c(0,50,100))

##arrange plots

ggarrange(closed_autocracy, elect_autocracy, elect_democracy, liberal_democracy + rremove("x.text"), 
          labels = c("Closed Autocracy", "Electoral Autocracy", "Electoral Democracy", "Liberal Democracy"),
          ncol = 2, nrow = 2) -> plot


plot













